nusmodsnusmods API at https://nusmods.com/api/.# load bidding data
# calculate loading times
before <- Sys.time()
# read data directly from URL
myjson <- fromJSON(file = url("https://api.nusmods.com/corsBiddingStatsRaw.json"))
# create empty dataframe which will act as a container to be populated with data
myBid <- data.frame()
# for each element in the myjson list, append it to myBid
for(r in 1:length(myjson))
{
if(myjson[[r]]$Semester == 1 | myjson[[r]]$Semester == 2)
{
myBid <- rbind(myBid, myjson[[r]])
}
myjson[[r]] <- NA
}
# calculate loading time
after <- Sys.time()
after - before
# save
saveRDS(myBid, file = "myBid.RDS")myBid.RDS# create empty dataframe which will act as a container to be populated with data
myModInfo <- data.frame()
# looping through each year
for(year in c(2011:2018))
{
for(semester in c(1,2))
{
# create the url where data is to be extracted from
myurl <- paste0("https://api.nusmods.com/", year, "-", year + 1, "/", semester, "/moduleTimetableDeltaRaw.json")
myjson <- fromJSON(file = url(myurl))
# for each element in the myjson list, append it to myModInfo
for(r in 1:length(myjson))
{
if(isTRUE(str_detect(myjson[[r]]$ModuleCode, "^PL")))
{
if(myjson[[r]]$Semester == 1 | myjson[[r]]$Semester == 2)
{
myModInfo <- rbind(myModInfo, myjson[[r]])
}
}
myjson[[r]] <- NA
}
cat(year, "Semester", semester, "Done!")
}
}
# save
saveRDS(myModInfo, file = "myModInfo.RDS")myModInfo.RDSmyModInfomyModInfo.
myBidmyBid.
myBid <- myBid %>%
filter(str_detect(ModuleCode, "^PL")) %>%
filter(!str_detect(ModuleCode, "PLS|PLB")) %>%
filter(!str_detect(StudentAcctType, "Reserved")) %>%
filter(!str_detect(StudentAcctType, "[G]")) %>%
filter(!str_detect(paste0(unique(myBid$ModuleCode[grep("2",myBid$Group)]), collapse = "|"), ModuleCode)) %>%
select(-Faculty, -Group)
head(myBid)myModInfo and myBid.# transform these columns to numeric
for(r in c("Quota", "Bidders", "LowestBid", "LowestSuccessfulBid", "HighestBid", "StartTime"))
{
mydata[,grep(r, names(mydata))] <- as.numeric(mydata[,grep(r, names(mydata))])
}
# transform these columns to factors
for(r in c("AcadYear", "Semester", "ModuleCode", "Round", "StudentAcctType", "DayText"))
{
mydata[,grep(r, names(mydata))] <- factor(mydata[,grep(r, names(mydata))])
}# create vector of the column names which are factors
facnames <- names(select_if(mydata, is.factor))
# factor names without ModuleCode and StudentAcctType
facnames.mod <- facnames[-grep("ModuleCode", facnames)]
# create vector of the column names which are numeric
numnames <- names(select_if(mydata, is.numeric))
# numeric names without StartTime
numnames.time <- names(select_if(mydata, is.numeric))[-grep("StartTime", numnames)]DayText LevelsBidders is calculated across all academic years, all bidding rounds, all modules…## AcadYear Semester Round ModuleCode Quota Bidders LowestBid LowestSuccessfulBid HighestBid StudentAcctType DayText StartTime
## 2013/2014:329 1:799 1A:496 PL3232 : 74 Min. : 1.00 Min. : 0.00 Min. : 0.00 Min. : 0.0 Min. : 0.0 New[P] :253 Monday :308 Min. : 800
## 2015/2016:293 2:904 1B:290 PL3236 : 72 1st Qu.: 4.00 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.0 1st Qu.: 0.0 NUS[P] :246 Tuesday :312 1st Qu.:1000
## 2014/2015:283 1C:176 PL3233 : 71 Median : 15.00 Median : 3.00 Median : 1.00 Median : 1.0 Median : 247.0 Return[P] :894 Wednesday:462 Median :1200
## 2012/2013:267 2A:243 PL3235 : 71 Mean : 22.65 Mean : 11.79 Mean : 64.12 Mean : 231.7 Mean : 689.3 ReturnNew[P]:310 Thursday :356 Mean :1293
## 2016/2017:205 2B:252 PL3234 : 70 3rd Qu.: 32.00 3rd Qu.: 8.00 3rd Qu.: 3.00 3rd Qu.: 101.0 3rd Qu.:1178.5 Friday :265 3rd Qu.:1500
## 2011/2012:176 3A:129 PL2132 : 63 Max. :197.00 Max. :208.00 Max. :2430.00 Max. :3459.0 Max. :4801.0 Max. :1800
## (Other) :150 3B:117 (Other):1282
# plot the categorical variables
# note: I did not include ModuleCode in this exploratory graph because it has too many levels (83)
for(r in facnames.mod)
{
cat(paste0("Histogram Of ", r))
plot(
ggplot(data = mydata, aes_string(x = r, fill = r)) +
geom_histogram(stat = "count") +
ylab("Count") +
ggtitle(paste0("Count of ", r)) +
theme_classic() +
theme(axis.text.x = element_text(angle = 90, size = 6, vjust = -0.3),
axis.title.x = element_blank(),
legend.position = "none")
)
}## Histogram Of AcadYear
## Histogram Of Semester
## Histogram Of Round
## Histogram Of StudentAcctType
## Histogram Of DayText
# plot the continuous variables
for(r in numnames)
{
cat(paste0("Boxplots Of ", r))
plot(
ggplot(data = mydata, aes_string(x = r, fill = r)) +
geom_boxplot(fill = "violetred", alpha = 0.5) +
ylab("Histogram") +
ggtitle(paste0(r)) +
theme_classic() +
theme(axis.text.x = element_text(angle = 90, size = 6, vjust = -0.3),
axis.title.x = element_text())
)
}## Boxplots Of Quota
## Boxplots Of Bidders
## Boxplots Of LowestBid
## Boxplots Of LowestSuccessfulBid
## Boxplots Of HighestBid
## Boxplots Of StartTime
# create vector to loop across
for(r in 1:length(facnames.mod))
{
for(i in 1:length(facnames.mod))
{
# dont do anything if they are the same or the graph has been made before
if(i == r | i < r)
{
} else {
cat(paste0(facnames.mod[r]," ~ ",facnames.mod[i]))
# create formula for xtabs
tempform <- paste0("~ ", facnames.mod[r], " + ", facnames.mod[i])
# temp is a dataframe that is only going to exist in this section
# and overwritten with each loop
temp <- as.data.frame(xtabs(eval(parse(text = tempform)),
data = mydata,
subset = NULL))
plot(
ggplot(data = temp, aes_string(x = facnames.mod[r], y = facnames.mod[i], fill = "Freq", label = "Freq")) +
geom_tile() +
geom_text() +
scale_fill_gradient(low = "white", high = "violetred") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, vjust = -0.3),
legend.position = "none")
)
}
}
}## AcadYear ~ Semester
## AcadYear ~ Round
## AcadYear ~ StudentAcctType
## AcadYear ~ DayText
## Semester ~ Round
## Semester ~ StudentAcctType
## Semester ~ DayText
## Round ~ StudentAcctType
## Round ~ DayText
## StudentAcctType ~ DayText
for(r in 1:length(numnames))
{
for(i in 1:length(numnames))
{
# dont do anything if they are the same or the graph has been made before
if(i == r | i < r)
{
} else {
cat(paste0(numnames[r]," ~ ",numnames[i]))
# create formula for lm()
tempform.std <- paste0("scale(", numnames[i],")", " ~ ", "scale(", numnames[r], ")")
tempform <- paste0(numnames[i], " ~ ", numnames[r])
# regress to get best fit line
# standardized
stdreg <- lm(eval(parse(text = tempform.std)),
data = mydata)
# unstandardized
reg <- lm(eval(parse(text = tempform)),
data = mydata)
plot(
ggplot(data = mydata, aes_string(x = numnames[r], y = numnames[i])) +
geom_point(color = "violetred", size = 2, alpha = 0.3) +
theme_classic() +
geom_abline(slope = reg$coefficients[2], intercept = reg$coefficients[1], lty = "dashed") +
geom_label(aes(x = Inf, y = Inf, label = paste0("Standardized Regression Coefficient = ",
round(stdreg$coefficients[2],3)),
hjust = 1, vjust = 2)) +
theme(axis.text.x = element_text(angle = 90, vjust = -0.3))
)
}
}
}## Quota ~ Bidders
## Quota ~ LowestBid
## Quota ~ LowestSuccessfulBid
## Quota ~ HighestBid
## Quota ~ StartTime
## Bidders ~ LowestBid
## Bidders ~ LowestSuccessfulBid
## Bidders ~ HighestBid
## Bidders ~ StartTime
## LowestBid ~ LowestSuccessfulBid
## LowestBid ~ HighestBid
## LowestBid ~ StartTime
## LowestSuccessfulBid ~ HighestBid
## LowestSuccessfulBid ~ StartTime
## HighestBid ~ StartTime
corrplot.mixed(cor(mydata[,grep(paste0(numnames.time, collapse = "|"), names(mydata))]),
upper = "color",
tl.pos = "lt",
tl.cex = 0.5,
cl.cex = 0.5)for(r in facnames.mod)
{
for(i in numnames)
{
cat(paste0(r," ~ ",i))
# graph
plot(
ggplot(data = mydata, aes_string(x = r, y = i, fill = r)) +
geom_boxplot() +
theme_classic() +
theme(legend.position = "none",
axis.text.x = element_text(angle = 90, vjust = -0.3))
)
}
}## AcadYear ~ Quota
## AcadYear ~ Bidders
## AcadYear ~ LowestBid
## AcadYear ~ LowestSuccessfulBid
## AcadYear ~ HighestBid
## AcadYear ~ StartTime
## Semester ~ Quota
## Semester ~ Bidders
## Semester ~ LowestBid
## Semester ~ LowestSuccessfulBid
## Semester ~ HighestBid
## Semester ~ StartTime
## Round ~ Quota
## Round ~ Bidders
## Round ~ LowestBid
## Round ~ LowestSuccessfulBid
## Round ~ HighestBid
## Round ~ StartTime
## StudentAcctType ~ Quota
## StudentAcctType ~ Bidders
## StudentAcctType ~ LowestBid
## StudentAcctType ~ LowestSuccessfulBid
## StudentAcctType ~ HighestBid
## StudentAcctType ~ StartTime
## DayText ~ Quota
## DayText ~ Bidders
## DayText ~ LowestBid
## DayText ~ LowestSuccessfulBid
## DayText ~ HighestBid
## DayText ~ StartTime
Level# create new variable that indicates the level of the module, based on their module code
mydata$Level <- ifelse(str_detect(mydata$ModuleCode, "1[0-9][0-9][0-9]"), "Level 1",
ifelse(str_detect(mydata$ModuleCode, "2[0-9][0-9][0-9]"), "Level 2",
ifelse(str_detect(mydata$ModuleCode, "3[0-9][0-9][0-9]"), "Level 3",
ifelse(str_detect(mydata$ModuleCode, "4[0-9][0-9][0-9]"), "Level 4",
"Graduate Module"))))Bidders Per Quotafor(r in unique(mydata$ModuleCode))
{
plot(
mydata %>%
filter(ModuleCode == r) %>%
ggplot(aes(x = StartTime, y = BidPerQuota, color = Semester, label = AcadYear)) +
geom_label_repel(fill = "black") +
geom_point() +
ggtitle(r) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = -0.3),
legend.position = "none") +
xlim(c(800,1800)) +
ylim(c(0,15)) +
geom_vline(xintercept = 1200, color = "violetred", lty = "dashed") +
geom_vline(xintercept = 1700, color = "darkblue", lty = "dashed")
)
}